/* Uses two-way FE estimation output to create pent-year level data set of labour distribution
across WFE quartiles. */



#delimit ;

clear all;

local outfile "GWgap_2wFE_fagg_v4";
set more off;

di _n "$S_DATE $S_TIME";





*******************************************************************************;



import delimited using "Wgap_v3_fpers.csv", varn(1) case(preserve) 
	stringc(1) clear;
drop if pent=="NULL" | March_yr==.;
compress;
save temp_wfe_izi, replace;
	
	
* merging on their personal wfes;

merge m:1 snz_uid using GWgap_2wFE_extract_v4_wfe_t, keep(master match) nogen;
drop if wfe_t==.;

merge m:1 snz_uid using GWgap_2wFE_extract_v4_wfe_m, keep(master match) nogen;
rename wfe wfe_m;

merge m:1 snz_uid using GWgap_2wFE_extract_v4_wfe_f, keep(master match) nogen;
rename wfe wfe_f;

drop if wfe_t==. & wfe_m==. & wfe_f;

save temp_fagg, replace;

foreach g in t m f {;

	use pent March_yr mon_wkd wfe_`g' if wfe_`g'!=. using temp_fagg, clear;

	xtile wfe_`g'_izi_q = wfe_`g', n(4);
	compress;

	collapse (sum) mon_wkd, by(pent March_yr wfe_`g'_izi_q);
	reshape wide mon_wkd, i(pent March_yr) j(wfe_`g'_izi_q);

	forvalues i = 1/4 {;
		replace mon_wkd`i' = 0 if mon_wkd`i'==.;
		replace mon_wkd`i' = mon_wkd`i'/12;
		rename mon_wkd`i' yr_wkd`i';
	};

	gen L_hc_wfe_izi = yr_wkd1 + yr_wkd2 + yr_wkd3 + yr_wkd4;
	forvalues i = 1/4 {;
		gen wfeq`i'_hc_izi = yr_wkd`i'/L_hc_wfe_izi;
		label var wfeq`i'_hc_izi "Fraction of firm head count in v4 CCK wfe quartile `i'";
		notes wfeq`i'_hc_izi: Ignores workers with missing wfe. CCK wfe is from our FE
			estimation that pools male and female.;
	};

	keep pent March_yr wfeq*;
	rename wfeq* wfe_`g'q*;
	rename March_yr year;
	label var year "March year";
	compress;
	save temp_`g'fagg, replace;
	
};

use temp_tfagg, clear;
rename wfe_tq* wfeq*;
merge 1:1 pent year using temp_mfagg, keep(master match using) nogen;
merge 1:1 pent year using temp_ffagg, keep(master match using) nogen;

notes: `outfile'.dta was created by `outfile'.do at $S_TIME on $S_DATE. It is pent-March year level
data set that gives the labour distribution across wfe quartiles based our CCK wfes pooling genders
and for each gender.;
save `outfile', replace;


